{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nest_asyncio\n",
    "import pandas as pd\n",
    "import phoenix as px\n",
    "from phoenix.experimental.evals.functions import llm_classify\n",
    "from phoenix.experimental.evals.models import OpenAIModel\n",
    "from phoenix.experimental.evals.templates.default_templates import (\n",
    "    HALLUCINATION_PROMPT_RAILS_MAP,\n",
    "    HALLUCINATION_PROMPT_TEMPLATE,\n",
    "    QA_PROMPT_RAILS_MAP,\n",
    "    QA_PROMPT_TEMPLATE,\n",
    "    RAG_RELEVANCY_PROMPT_RAILS_MAP,\n",
    "    RAG_RELEVANCY_PROMPT_TEMPLATE,\n",
    ")\n",
    "from phoenix.trace.dsl import SpanQuery\n",
    "from phoenix.trace.dsl.helpers import (\n",
    "    INPUT,\n",
    "    IO,\n",
    "    IS_RETRIEVER,\n",
    "    IS_ROOT,\n",
    "    get_qa_with_reference,\n",
    "    get_retrieved_documents,\n",
    ")\n",
    "from phoenix.trace.semantic_conventions import (\n",
    "    DOCUMENT_CONTENT,\n",
    "    DOCUMENT_SCORE,\n",
    "    RETRIEVAL_DOCUMENTS,\n",
    ")\n",
    "\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from getpass import getpass\n",
    "\n",
    "import openai\n",
    "\n",
    "if not (openai_api_key := os.getenv(\"OPENAI_API_KEY\")):\n",
    "    openai_api_key = getpass(\"🔑 Enter your OpenAI API key: \")\n",
    "openai.api_key = openai_api_key\n",
    "os.environ[\"OPENAI_API_KEY\"] = openai_api_key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = OpenAIModel(model_name=\"gpt-3.5-turbo-instruct\")\n",
    "model(\"hi\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = px.load_example_traces(\"llama_index_rag\")\n",
    "session = px.launch_app(trace=ds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "docs_df = get_retrieved_documents(px.Client())\n",
    "docs_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "docs_eval = llm_classify(\n",
    "    docs_df,\n",
    "    model,\n",
    "    RAG_RELEVANCY_PROMPT_TEMPLATE,\n",
    "    list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values()),\n",
    "    provide_explanation=True,\n",
    ")\n",
    "docs_eval[\"score\"] = (docs_eval.label[docs_eval.label.notnull()] == \"relevant\").astype(int)\n",
    "docs_eval.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "qa_df = get_qa_with_reference(px.Client())\n",
    "qa_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "qa_eval = llm_classify(\n",
    "    qa_df,\n",
    "    model,\n",
    "    QA_PROMPT_TEMPLATE,\n",
    "    list(QA_PROMPT_RAILS_MAP.values()),\n",
    "    provide_explanation=True,\n",
    ")\n",
    "qa_eval[\"score\"] = (qa_eval.label[qa_eval.label.notnull()] == \"correct\").astype(int)\n",
    "qa_eval.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "hallucination_eval = llm_classify(\n",
    "    qa_df,\n",
    "    model,\n",
    "    HALLUCINATION_PROMPT_TEMPLATE,\n",
    "    list(HALLUCINATION_PROMPT_RAILS_MAP.values()),\n",
    "    provide_explanation=True,\n",
    ")\n",
    "hallucination_eval[\"score\"] = (\n",
    "    hallucination_eval.label[hallucination_eval.label.notnull()] == \"factual\"\n",
    ").astype(int)\n",
    "hallucination_eval.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "session.query_spans(SpanQuery().select(**IO).where(IS_ROOT))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "session.query_spans(\n",
    "    SpanQuery()\n",
    "    .select(**INPUT)\n",
    "    .explode(RETRIEVAL_DOCUMENTS, reference=DOCUMENT_CONTENT, score=DOCUMENT_SCORE)\n",
    "    .where(IS_RETRIEVER)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.concat(\n",
    "    session.query_spans(\n",
    "        SpanQuery().select(**IO).where(IS_ROOT),\n",
    "        SpanQuery()\n",
    "        .select(span_id=\"parent_id\")\n",
    "        .concat(RETRIEVAL_DOCUMENTS, reference=DOCUMENT_CONTENT),\n",
    "    ),\n",
    "    axis=1,\n",
    "    join=\"inner\",\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
